Report last updated Tue May 31 14:52:09 2016.
library(ggplot2)
library(reshape)
library(limma)
library(CHBUtils)
library(biomaRt)
library(dplyr)
library(cluster)
library(gridExtra)
library(logging)
library(org.Mm.eg.db)
library(DEGreport)
library(dplyr)
order_group=c("control", "day1", "day2", "day7", "day14")
# root_path = "~/orch/scratch/vishal_mirna_kidney/publish/FA-model"
result_files = file.path(root_path, "omics/files_publish")
dir.create(result_files, showWarnings = F, recursive = T)
basicConfig(level='INFO')FC > 4 and FDR < 1%
protein_file = file.path(root_path, "protein", "files_publish")
dir.create(protein_file, recursive = T, showWarnings = F)
counts = read.csv(file.path(root_path, "protein", "FA_Proteomics_RawData.csv"),skip = 3) %>% tidyr::separate(Protein.Id, c("sp", "Uniprot.ID", "type"),sep = "[::|::]", extra = "merge")
row.names(counts) = counts$Uniprot.ID
mapping = data.frame(id=rownames(counts))
mapping = annotate_df(mapping, "id", 'mmusculus_gene_ensembl', "uniprot_genename", "ensembl_gene_id") %>% distinct(ensembl_gene_id)
mapping2 = data.frame(id=unique(counts$Gene.Symbol)[!is.na(unique(counts$Gene.Symbol))])
mapping2 = annotate_df(mapping2, "id", 'mmusculus_gene_ensembl', "wikigene_name", "ensembl_gene_id") %>% distinct(ensembl_gene_id)
counts[as.character(mapping$id), "ensembl"] = mapping$ensembl_gene_id
idx = match(as.character(mapping2$id),as.character(counts$Gene.Symbol))
counts[idx, "ensembl"] = mapping2$ensembl_gene_id
counts$symbol = convertIDs(as.character(counts$ensembl), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
save_file(counts, "counts_annotated.csv", protein_file)
counts = counts %>% distinct(ensembl) %>% filter(!is.na(ensembl))
clean_counts = counts[,7:16]
rownames(clean_counts) = counts$ensembl
names(clean_counts) = sapply(names(clean_counts), tolower)
names(clean_counts)[1:10] = c("control-1", "control-2", "day1-1", "day1-2",
"day2-1", "day2-2", "day7-1", "day7-2", "day14-1", "day14-2")
cat("## Expression density of 'raw' data\n\n")ggplot(melt(clean_counts), aes(x=log2(value), colour=variable)) + geom_density()dge = edgeR::DGEList(clean_counts)
dge = edgeR::calcNormFactors(dge, method="TMM")
coldata = data.frame(row.names=colnames(clean_counts), samples=colnames(clean_counts)) %>% tidyr::separate(samples,into = c("group"), extra = "drop")
coldata$group = sapply(coldata$group, tolower)
norm_counts = edgeR::cpm(dge, log=T)
MA <- normalizeBetweenArrays(as.matrix(norm_counts), method="none")
cat("## Expression density of normalize data\n\n")ggplot(melt(as.data.frame(MA)), aes(x=value, colour=variable)) + geom_density()cat("## MDS plot\n\n")mds(MA, condition = coldata$group)save_file(MA, "fa_model_log2_counts.tsv", protein_file)
design <- model.matrix(~ 0 + coldata$group)
colnames(design) = c("day1", "day14", "day2", "day7", "normal")
fit <- lmFit(MA, design)
contrast.matrix <- makeContrasts(day1-normal, day2-day1, day7-day2, day14-day7,
day2-normal, day7-normal, day14-normal,
day7-day1, day7-day2,
day14-day1, day14-day2,
levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
res = topTable(fit2, adjust="BH", n="Inf")
save_file(res, "fa_model.tsv", protein_file)
absMaxFC = rowMax(as.matrix(abs(res[,1:11])))
sign = row.names(res[absMaxFC>2 & res$adj.P.Val<0.01,])
cat("## MDS plot of DE proteins\n\n")mds(MA[sign,], condition = coldata$group)coldata$group = factor(coldata$group, levels=order_group)
cat("## Clustering analysis\n\n")clusters = degPatterns(MA[sign,], coldata, minc = 15, summarize = "group", time="group", col = NULL)Working with 287 genes
.df=clusters$df
save_file(.df, "clusters_genes_cluster.tsv", protein_file)mrna_path = "rnaseq/final/2016-02-05_mrna_bio/files_publish"
mrna_matrix = read.csv(file.path(root_path, mrna_path, "fa_model_log2_counts.tsv"), row.names = 1)
mrna_clusters = read.csv(file.path(root_path, mrna_path, "clusters_genes.tsv"), row.names = 1)
mrna_col = data.frame(row.names=colnames(mrna_matrix), samples=colnames(mrna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mrna_col) = colnames(mrna_matrix)
keep = rownames(mrna_col)[!is.na(mrna_col$group)]
mrna_col = mrna_col[keep,,drop=F]
mrna_matrix = mrna_matrix[,keep]
prot_path = "protein/files_publish"
prot_matrix = read.csv(file.path(root_path, prot_path, "fa_model_log2_counts.tsv"), row.names = 1)
prot_clusters = read.csv(file.path(root_path, prot_path, "clusters_genes_cluster.tsv"), row.names = 1)
prot_col = data.frame(row.names=colnames(prot_matrix), samples=colnames(prot_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(prot_col) = colnames(prot_matrix)
mirna_path = "srnaseq/final/files_publish"
mirna_matrix = read.csv(file.path(root_path, mirna_path, "fa_model_mirna_log2_counts.tsv"), row.names = 1)
load(file.path(root_path, mirna_path, "ma.rda"))
mirna_col = data.frame(row.names=colnames(mirna_matrix), samples=colnames(mirna_matrix)) %>% tidyr::separate(samples,into = c("id", "group"), extra = "drop") %>% mutate(group=factor(gsub("day0", "control", tolower(group)), levels=order_group))
rownames(mirna_col) = colnames(mirna_matrix)
keep = rownames(mirna_col)[!is.na(mirna_col$group)]
mirna_col = mirna_col[keep,,drop=F]
mirna_matrix = mirna_matrix[,keep]library(SummarizedExperiment)
library(isomiRs)
mrna_keep = as.character(mrna_clusters$genes)
prot_keep = unique(c(intersect(mrna_keep, rownames(prot_matrix)), as.character(prot_clusters$genes)))
keep = intersect(mrna_keep, rownames(prot_matrix))
df = degMerge(list(mrna=mrna_matrix[keep,], prot=prot_matrix[keep,]),
list(mrna=mrna_clusters),
list(mrna=mrna_col, prot=prot_col),
summarize="group",
time="group", col=NULL,
scale=TRUE)Working with 62 genes
Working with 85 genes
Working with 34 genes
Working with 14 genes
Working with 11 genes
Working with 25 genes
Working with 13 genes
Working with 8 genes
Working with 14 genes
Working with 5 genes
Working with 5 genes
Working with 8 genes
Working with 5 genes
Working with 11 genes
Working with 6 genes
prot_merged = do.call(rbind, df)
prot_merged$name = convertIDs(as.character(prot_merged$gene), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(prot_merged) = paste0("prot_", names(prot_merged))
mi_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mirna_matrix)), colData= mirna_col)
gene_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mrna_matrix)), colData=mrna_col)
cor_tar = find_targets(mi_rse, gene_rse, ma)Dimmension of cor matrix: 73 7548
mapping = melt(cor_tar) %>% filter(value<0)
keep = intersect(mrna_keep, as.character(unique(mapping[,1])))
mirna_keep = as.character(unique(mapping[,2]))
df = degMerge(list(mrna=mrna_matrix[keep,],
mirna=mirna_matrix[mirna_keep,]),
list(mrna=mrna_clusters),
list(mrna=mrna_col, mirna=mirna_col),
summarize="group",
time="group", col=NULL,
scale=TRUE, mapping=list(mirna=mapping))Working with 6 genes
Working with 8 genes
Working with 5 genes
mirna_merged = do.call(rbind, df)
mirna_merged$name = convertIDs(as.character(mirna_merged$pair), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(mirna_merged) = paste0("mirna_", names(mirna_merged))
merged = merge(mirna_merged[mirna_merged$mirna_cor < -.6,c(6,7,1:5)], prot_merged[prot_merged$prot_cor > .6,c(5,7,1:4)], by=1, all=TRUE)
merged_all = merge(mirna_merged[c(6,7,1:5)], prot_merged[c(5,7,1:4)], by=1, all=TRUE)
save_file(merged, "omics_filtered.csv", result_files)
save_file(merged_all, "omics.csv", result_files)
save_file(prot_merged, "prot_omics.csv", result_files)
save_file(mirna_merged, "mirna_omics.csv", result_files)(useful if replicating these results)
sessionInfo()R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux stretch/sid
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 parallel methods stats graphics grDevices utils
[8] datasets base
other attached packages:
[1] isomiRs_0.99.18 DiscriMiner_0.1-29
[3] SummarizedExperiment_1.2.2 GenomicRanges_1.24.0
[5] GenomeInfoDb_1.8.1 DEGreport_1.5.0
[7] quantreg_5.24 SparseM_1.7
[9] org.Mm.eg.db_3.3.0 AnnotationDbi_1.34.3
[11] IRanges_2.6.0 S4Vectors_0.10.1
[13] Biobase_2.32.0 BiocGenerics_0.18.0
[15] logging_0.7-103 gridExtra_2.2.1
[17] cluster_2.0.4 dplyr_0.4.3
[19] biomaRt_2.28.0 CHBUtils_0.1
[21] limma_3.28.5 reshape_0.8.5
[23] ggplot2_2.1.0 knitr_1.13
[25] myRfunctions_0.1 rmarkdown_0.9.6
[27] BiocInstaller_1.22.2
loaded via a namespace (and not attached):
[1] edgeR_3.14.0 splines_3.3.0 gtools_3.5.0
[4] Formula_1.2-1 assertthat_0.1 latticeExtra_0.6-28
[7] yaml_2.1.13 RSQLite_1.0.0 lattice_0.20-33
[10] chron_2.3-47 digest_0.6.9 RColorBrewer_1.1-2
[13] XVector_0.12.0 colorspace_1.2-6 htmltools_0.3.5
[16] Matrix_1.2-6 plyr_1.8.3 DESeq2_1.12.3
[19] XML_3.98-1.4 genefilter_1.54.2 zlibbioc_1.18.0
[22] xtable_1.8-2 scales_0.4.0 gdata_2.17.0
[25] BiocParallel_1.6.2 MatrixModels_0.4-1 annotate_1.50.0
[28] lazyeval_0.1.10 nnet_7.3-12 survival_2.39-4
[31] magrittr_1.5 evaluate_0.9 GGally_1.0.1
[34] gplots_3.0.1 foreign_0.8-66 tools_3.3.0
[37] data.table_1.9.6 formatR_1.4 stringr_1.0.0
[40] locfit_1.5-9.1 munsell_0.4.3 caTools_1.17.1
[43] grid_3.3.0 RCurl_1.95-4.8 labeling_0.3
[46] bitops_1.0-6 codetools_0.2-14 gtable_0.2.0
[49] DBI_0.4-1 R6_2.1.2 Nozzle.R1_1.1-1
[52] Hmisc_3.17-4 KernSmooth_2.23-15 stringi_1.1.1
[55] Rcpp_0.12.5 geneplotter_1.50.0 rpart_4.1-10
[58] acepack_1.3-3.3 coda_0.18-1